package top.jiangqiang.crawler.core.entities;

import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import top.jiangqiang.crawler.core.utils.DocumentUtil;

import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.List;

@Getter
@Setter
@NoArgsConstructor
@Slf4j
public class HtmlCrawler extends TextCrawler {
    public Document getDocument(Charset charset) {
        return Jsoup.parse(getContent(charset), getUrl());
    }

    public Document getDocument() {
        return Jsoup.parse(getContent(StandardCharsets.UTF_8), getUrl());
    }

    public List<String> getAllUrl(Boolean strong) {
        return getAllUrl(strong, StandardCharsets.UTF_8);
    }

    public List<String> getAllUrl(Boolean strong, Charset charset) {
        return DocumentUtil.getAllUrl(getContent(charset), getUrl(), strong);
    }
}
